{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "jUMerb-ChqzU",
    "colab_type": "text"
   },
   "source": [
    "# Load & Extract\n",
    "\n",
    "## Download Pretrained Weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "j6ggPFIzkjR7",
    "colab_type": "code",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "!pip install -q keras-bert"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab_type": "code",
    "id": "VN1LrB1P9mhH",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "!wget -q https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "-xYtjuUvhYY4",
    "colab_type": "code",
    "outputId": "cc6e84b9-6026-4443-b629-a57720efbc2d",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 119.0
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Archive:  chinese_L-12_H-768_A-12.zip\n",
      "  inflating: chinese_L-12_H-768_A-12/bert_model.ckpt.meta  \n",
      "  inflating: chinese_L-12_H-768_A-12/bert_model.ckpt.data-00000-of-00001  \n",
      "  inflating: chinese_L-12_H-768_A-12/vocab.txt  \n",
      "  inflating: chinese_L-12_H-768_A-12/bert_model.ckpt.index  \n",
      "  inflating: chinese_L-12_H-768_A-12/bert_config.json  \n"
     ]
    }
   ],
   "source": [
    "!unzip -o chinese_L-12_H-768_A-12.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "laBEGbeeiD0X",
    "colab_type": "text"
   },
   "source": [
    "## Build Model & Dictionary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "AKC4cOstjAKK",
    "colab_type": "text"
   },
   "source": [
    "Set paths:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "lJYa8uvOiA6t",
    "colab_type": "code",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "pretrained_path = 'chinese_L-12_H-768_A-12'\n",
    "config_path = os.path.join(pretrained_path, 'bert_config.json')\n",
    "checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')\n",
    "vocab_path = os.path.join(pretrained_path, 'vocab.txt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XmboeInri6bD",
    "colab_type": "text"
   },
   "source": [
    "Enable `tf.keras` by adding `TF_KERAS` to environment variables:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "LOQHPPyii5Tk",
    "colab_type": "code",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "os.environ['TF_KERAS'] = '1'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "RUlKT2XIjJO3",
    "colab_type": "text"
   },
   "source": [
    "Build the dictionary:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "NHEAc16hjIEA",
    "colab_type": "code",
    "colab": {}
   },
   "outputs": [],
   "source": [
    "import codecs\n",
    "\n",
    "token_dict = {}\n",
    "with codecs.open(vocab_path, 'r', 'utf8') as reader:\n",
    "    for line in reader:\n",
    "        token = line.strip()\n",
    "        token_dict[token] = len(token_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "QXnghzK-jRC1",
    "colab_type": "text"
   },
   "source": [
    "Build the model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "ETadpJ5_jTgY",
    "colab_type": "code",
    "outputId": "11847961-846a-4711-cc0a-4f3c0cb3c8b1",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 4219.0
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
      "________________________________________________________________________________________________________________________\n",
      "Layer (type)                           Output Shape               Param #       Connected to                            \n",
      "========================================================================================================================\n",
      "Input-Token (InputLayer)               (None, 512)                0                                                     \n",
      "________________________________________________________________________________________________________________________\n",
      "Input-Segment (InputLayer)             (None, 512)                0                                                     \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Token (TokenEmbedding)       [(None, 512, 768), (21128, 16226304      Input-Token[0][0]                       \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Segment (Embedding)          (None, 512, 768)           1536          Input-Segment[0][0]                     \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Token-Segment (Add)          (None, 512, 768)           0             Embedding-Token[0][0]                   \n",
      "                                                                                Embedding-Segment[0][0]                 \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Position (PositionEmbedding) (None, 512, 768)           393216        Embedding-Token-Segment[0][0]           \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Dropout (Dropout)            (None, 512, 768)           0             Embedding-Position[0][0]                \n",
      "________________________________________________________________________________________________________________________\n",
      "Embedding-Norm (LayerNormalization)    (None, 512, 768)           1536          Embedding-Dropout[0][0]                 \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Embedding-Norm[0][0]                    \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-1-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Embedding-Norm[0][0]                    \n",
      "                                                                                Encoder-1-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-1-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-1-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-1-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-1-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-1-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-1-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-1-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-1-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-2-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-1-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-2-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-2-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-2-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-2-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-2-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-2-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-2-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-2-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-2-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-3-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-2-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-3-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-3-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-3-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-3-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-3-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-3-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-3-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-3-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-3-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-4-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-3-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-4-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-4-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-4-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-4-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-4-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-4-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-4-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-4-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-4-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-5-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-4-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-5-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-5-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-5-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-5-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-5-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-5-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-5-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-5-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-5-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-6-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-5-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-6-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-6-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-6-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-6-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-6-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-6-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-6-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-6-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-6-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-7-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-6-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-7-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-7-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-7-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-7-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-7-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-7-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-7-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-7-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-7-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-8-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-7-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-8-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-8-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-8-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-8-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-8-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-8-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-8-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-8-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-MultiHeadSelfAttention (Mult (None, None, 768)          2362368       Encoder-8-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-MultiHeadSelfAttention-Dropo (None, None, 768)          0             Encoder-9-MultiHeadSelfAttention[0][0]  \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-MultiHeadSelfAttention-Add ( (None, 512, 768)           0             Encoder-8-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-9-MultiHeadSelfAttention-Dropout\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-MultiHeadSelfAttention-Norm  (None, 512, 768)           1536          Encoder-9-MultiHeadSelfAttention-Add[0][\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-FeedForward (FeedForward)    (None, 512, 768)           4722432       Encoder-9-MultiHeadSelfAttention-Norm[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-FeedForward-Dropout (Dropout (None, 512, 768)           0             Encoder-9-FeedForward[0][0]             \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-FeedForward-Add (Add)        (None, 512, 768)           0             Encoder-9-MultiHeadSelfAttention-Norm[0]\n",
      "                                                                                Encoder-9-FeedForward-Dropout[0][0]     \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-9-FeedForward-Norm (LayerNorma (None, 512, 768)           1536          Encoder-9-FeedForward-Add[0][0]         \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-MultiHeadSelfAttention (Mul (None, None, 768)          2362368       Encoder-9-FeedForward-Norm[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-MultiHeadSelfAttention-Drop (None, None, 768)          0             Encoder-10-MultiHeadSelfAttention[0][0] \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-MultiHeadSelfAttention-Add  (None, 512, 768)           0             Encoder-9-FeedForward-Norm[0][0]        \n",
      "                                                                                Encoder-10-MultiHeadSelfAttention-Dropou\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-MultiHeadSelfAttention-Norm (None, 512, 768)           1536          Encoder-10-MultiHeadSelfAttention-Add[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-FeedForward (FeedForward)   (None, 512, 768)           4722432       Encoder-10-MultiHeadSelfAttention-Norm[0\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-FeedForward-Dropout (Dropou (None, 512, 768)           0             Encoder-10-FeedForward[0][0]            \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-FeedForward-Add (Add)       (None, 512, 768)           0             Encoder-10-MultiHeadSelfAttention-Norm[0\n",
      "                                                                                Encoder-10-FeedForward-Dropout[0][0]    \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-10-FeedForward-Norm (LayerNorm (None, 512, 768)           1536          Encoder-10-FeedForward-Add[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-MultiHeadSelfAttention (Mul (None, None, 768)          2362368       Encoder-10-FeedForward-Norm[0][0]       \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-MultiHeadSelfAttention-Drop (None, None, 768)          0             Encoder-11-MultiHeadSelfAttention[0][0] \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-MultiHeadSelfAttention-Add  (None, 512, 768)           0             Encoder-10-FeedForward-Norm[0][0]       \n",
      "                                                                                Encoder-11-MultiHeadSelfAttention-Dropou\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-MultiHeadSelfAttention-Norm (None, 512, 768)           1536          Encoder-11-MultiHeadSelfAttention-Add[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-FeedForward (FeedForward)   (None, 512, 768)           4722432       Encoder-11-MultiHeadSelfAttention-Norm[0\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-FeedForward-Dropout (Dropou (None, 512, 768)           0             Encoder-11-FeedForward[0][0]            \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-FeedForward-Add (Add)       (None, 512, 768)           0             Encoder-11-MultiHeadSelfAttention-Norm[0\n",
      "                                                                                Encoder-11-FeedForward-Dropout[0][0]    \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-11-FeedForward-Norm (LayerNorm (None, 512, 768)           1536          Encoder-11-FeedForward-Add[0][0]        \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-MultiHeadSelfAttention (Mul (None, None, 768)          2362368       Encoder-11-FeedForward-Norm[0][0]       \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-MultiHeadSelfAttention-Drop (None, None, 768)          0             Encoder-12-MultiHeadSelfAttention[0][0] \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-MultiHeadSelfAttention-Add  (None, 512, 768)           0             Encoder-11-FeedForward-Norm[0][0]       \n",
      "                                                                                Encoder-12-MultiHeadSelfAttention-Dropou\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-MultiHeadSelfAttention-Norm (None, 512, 768)           1536          Encoder-12-MultiHeadSelfAttention-Add[0]\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-FeedForward (FeedForward)   (None, 512, 768)           4722432       Encoder-12-MultiHeadSelfAttention-Norm[0\n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-FeedForward-Dropout (Dropou (None, 512, 768)           0             Encoder-12-FeedForward[0][0]            \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-FeedForward-Add (Add)       (None, 512, 768)           0             Encoder-12-MultiHeadSelfAttention-Norm[0\n",
      "                                                                                Encoder-12-FeedForward-Dropout[0][0]    \n",
      "________________________________________________________________________________________________________________________\n",
      "Encoder-12-FeedForward-Norm (LayerNorm (None, 512, 768)           1536          Encoder-12-FeedForward-Add[0][0]        \n",
      "========================================================================================================================\n",
      "Total params: 101,677,056\n",
      "Trainable params: 0\n",
      "Non-trainable params: 101,677,056\n",
      "________________________________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "from keras_bert import load_trained_model_from_checkpoint\n",
    "\n",
    "model = load_trained_model_from_checkpoint(config_path, checkpoint_path)\n",
    "model.summary(line_length=120)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "w9kAo-ZDjZ21",
    "colab_type": "text"
   },
   "source": [
    "## Tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "DDsAcJI6jdjA",
    "colab_type": "code",
    "outputId": "e21c0038-c9d0-48bd-dc18-6051bf3d7929",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51.0
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[101, 6427, 6241, 3563, 1798, 102, 0, 0, 0, 0]\n",
      "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n"
     ]
    }
   ],
   "source": [
    "from keras_bert import Tokenizer\n",
    "\n",
    "tokenizer = Tokenizer(token_dict)\n",
    "text = '语言模型'\n",
    "tokens = tokenizer.tokenize(text)\n",
    "indices, segments = tokenizer.encode(first=text, max_len=512)\n",
    "print(indices[:10])\n",
    "print(segments[:10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "iofhUkhokBp6",
    "colab_type": "text"
   },
   "source": [
    "## Extract Feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "vQvu-SfGjiDI",
    "colab_type": "code",
    "outputId": "3fe3da96-f3f3-43c4-c9be-a752bb33740d",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 119.0
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CLS] [-0.6325103044509888, 0.20302410423755646, 0.07936538010835648, -0.03284265100955963, 0.5668085813522339]\n",
      "语 [-0.7588362097740173, 0.0965188592672348, 1.0718743801116943, 0.005039289593696594, 0.6887993812561035]\n",
      "言 [0.5477026104927063, -0.7921162843704224, 0.44435110688209534, -0.7112641930580139, 1.2048895359039307]\n",
      "模 [-0.29242411255836487, 0.6052717566490173, 0.49968627095222473, -0.42457854747772217, 0.42855408787727356]\n",
      "型 [-0.7473456263542175, 0.49431660771369934, 0.7185154557228088, -0.8723534941673279, 0.8349594473838806]\n",
      "[SEP] [-0.8741379976272583, -0.2165030986070633, 1.33883798122406, -0.10587061941623688, 0.3960897624492645]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "predicts = model.predict([np.array([indices]), np.array([segments])])[0]\n",
    "for i, token in enumerate(tokens):\n",
    "    print(token, predicts[i].tolist()[:5])"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "keras_bert_load_and_extract.ipynb",
   "version": "0.3.2",
   "provenance": [],
   "collapsed_sections": []
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
